# -*- coding: utf-8 -*-
"""
# --------------------------------------------------------
# @Author : Pan
# @E-mail : 
# @Date   : 2025-09-28 18:08:41
# @Brief  : https://github.com/abetlen/llama-cpp-python
# --------------------------------------------------------
"""
from llama_cpp import Llama
from llama_cpp.llama_chat_format import Qwen25VLChatHandler
model="../../data/models/Qwen/Qwen3-VL-2B-Instruct-GGUF/Qwen3VL-2B-Instruct-Q8_0.gguf"

# 加载模型（需已下载 GGUF 文件）
llm = Qwen25VLChatHandler(
    clip_model_path=model,

)

# 纯文本推理
response = llm.create_chat_completion(
    messages=[
        {"role": "user", "content": "What is the capital of France?"}
    ],
    temperature=0.7,
    max_tokens=128
)

print(response["choices"][0]["message"]["content"])